Load generic libraries
source('configuration.r')
Load plot specific libraries
library(foreach)
library(readr)
suppressMessages(library(igraph))
library(ggraph)
library(ggalluvial)
Create graph edge data
dat <- read.table('../tables/antibiotics_gene_linkage.tsv', head=FALSE, stringsAsFactors = FALSE) %>%
mutate(id=paste0(V1,':',V2, ':', V3)) %>%
mutate(V6=str_replace(V6, 'PheCmlA5', 'Phe')) %>%
mutate(V6=str_replace(V6, 'Far1_Fcd', 'Far1_Bla'))
edges.full <- foreach(id=unique(dat$id), .combine=rbind) %do% {
tmp <- dat[dat$id == id, ]
if(nrow(tmp) < 2) {
NULL
}else{
edge <- data.frame(t(combn(sort(tmp$V6), 2)), id=id)
edge$dist <- foreach(r=1:nrow(edge), .combine = c) %do% {
g1 <- sort(tmp[tmp$V6 == edge[r, ]$X1, 4:5])
g2 <- sort(tmp[tmp$V6 == edge[r, ]$X2, 4:5])
min(abs(g1[2]-g2[1]), abs(g1[1]-g2[2]))
}
edge
}
}
## save for ad hoc analysis
write.table(edges.full, '../output_tables/ar_gene_linkage_edge_list_uncollapsed.tsv', row.names = F, quote=F, sep='\t')
Function for ploting
get_graph_data <- function(pattern='', reverse=FALSE, dist.fil=Inf){
if(reverse){
edges.collapsed <- filter(edges.full, !str_detect(id, pattern))
}else{
edges.collapsed <- filter(edges.full, str_detect(id, pattern))
}
## filtering and collapse into edge weights
edges.collapsed <- edges.collapsed %>%
group_by(X1, X2) %>%
summarise(n=length(dist), weight=sum(dist<dist.fil)) %>%
filter(n>1) ### keep edges with more than 1 support
## Run MCL graph clustering
write.table(edges.collapsed %>% filter(weight>0) %>% select(-n),
'tmp_edges.tsv', quote=F, sep='\t', col.names = F, row.names = F)
system('/mnt/software/bin/mcl tmp_edges.tsv --abc -o tmp_mcl.tsv -overlap keep -I 3 2>/dev/null')
mcl <- read_tsv('tmp_mcl.tsv', col_names = F)
grp <- foreach(c=1:nrow(mcl), .combine = 'rbind') %do%{
tmp <- as.character(na.exclude(as.character(mcl[c,])))
data.frame(vertex=tmp, grp=c, stringsAsFactors = F)
} %>% data.frame(row.names = 1)
grp$grp[grp$grp >= (which(table(grp) < 3)[1])] <- NA ## no annotation for clusters with 2 genes only
## generate graph data
g <- graph_from_data_frame(edges.collapsed[,1:3], directed=FALSE)
## vertices
colors <- pal_simpsons('springfield')(16)
n.colors <- colors[as.numeric(as.factor(str_replace(V(g)$name, '.*_', '')))]
V(g)$color <- n.colors
V(g)$community <- grp[V(g)$name,]##cluster_walktrap(g)$membership
V(g)$Type <- str_replace(V(g)$name, '.*_', '')
## Edges
E(g)$width <- edges.collapsed$n
E(g)$community <-
apply(as.data.frame(get.edgelist(g, names=FALSE)), 1,
function(x)
ifelse(V(g)$community[x[1]] == V(g)$community[x[2]],
LETTERS[V(g)$community[x[1]]], NA))
## format vertex names
aux <- function(x){
if(length(x) > 2){
paste0(x[1],'_',x[2])
}else{
x[1]
}
}
V(g)$name <- sapply(str_split(V(g)$name, '_'), aux)
#### carbapenemase
args <- read.table('../metadata/Bla_Carb.dat', head=F, stringsAsFactors=F)[,1]
carb <- setNames(rep('No', length(V(g)$name)),V(g)$name)
carb[names(carb) %in% args] <- 'Yes'
V(g)$carb <- carb
g
}
Main figure of all genes
g <- get_graph_data(dist.fil = 5e3)
g_cols <- c((pal_lancet("lanonc")(9))[c(2:8,1)], pal_ucscgb("default")(26))[-c(2,7,8,9,10,12,17,18,19,20,21,23,24,26,27)]
## plot(1:19, 1:19, type="p", pch=65:(65+19-1), cex=2, col=g_cols) ## get legend alphabets
layout = create_layout(g, layout = 'igraph', algorithm = 'kk', kkconst=0.1)
p <- ggraph(layout)+#, circular=TRUE) +
geom_edge_arc(aes(width=((!is.na(community))+1), alpha=as.factor(is.na(community)), color=community),
curvature = 0.0,
#alpha=0.7,
end_cap=circle(1.5, 'mm'), start_cap=circle(1.5, 'mm')) +
scale_edge_color_manual(values=g_cols,
na.value = rgb(0.8,0.8,0.8,0.5)) +
scale_edge_alpha_manual(values=c(0.8, 0.3), guide=FALSE) +
geom_node_point(aes(fill=Type, color=carb),size=5, shape=21) +
scale_color_manual(values=c('grey','firebrick')) +
geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') +
scale_edge_width(range = c(1,1.5)) +
scale_fill_manual(values=pal_ucscgb('default')(16)[-c(1,3,4)]) +
theme_void()
p
ggsave('../plots/fig3a.ar_gene_linkage_graph.pdf', p, width=15, height=17)
Format data
edge.dat <- data.frame(v=V(g)$name, g=LETTERS[V(g)$community]) %>% filter(!is.na(g))
genome.info <- read.table('../tables/genome_info.dat', sep = '\t')
merge(dat, genome.info, by=c(1,2), all.x=TRUE) %>%
filter(!is.na(V6.y) | V1=='plasmid') %>% ## only take the high/medium quality ones
select(Species=V1, gene=V6.x) %>% mutate(gene=str_replace(gene, '_.*','')) -> sp.dat
sankey.dat <-
merge(sp.dat, edge.dat, by.x=2, by.y=1, all.y=T) %>%
mutate(Species=str_replace(Species,'_',' ')) %>%
group_by(Species, g) %>%
count %>%
mutate(Antibiotics_cluster=as.character(g)) %>%
filter(n>3) %>%
ungroup()
sankey.dat$Species <- factor(sankey.dat$Species, ordered = TRUE, levels = c("Acinetobacter baumannii",
"Acinetobacter sp.",
"Burkholderia lata",
"Enterobacter cloacae",
"Klebsiella pneumoniae",
"Pseudomonas aeruginosa",
"Corynebacterium striatum",
"Enterococcus faecalis",
"Enterococcus faecium",
"Enterococcus sp.",
"Staphylococcus aureus",
"Staphylococcus capitis",
"Staphylococcus cohnii",
"Staphylococcus epidermidis",
"Staphylococcus haemolyticus",
"Staphylococcus hominis",
"Staphylococcus lugdunensis",
"Staphylococcus warneri",
"plasmid"))
sankey.dat$Antibiotics_cluster <- factor(sankey.dat$Antibiotics_cluster, ordered=TRUE, levels=c(
"A", "G", "J", "K", "L", "M", "N", "B", "C", "D", "E", "F", "H", "I"
))
## mutate(Species=sapply(Species, function(x) ifelse(x=='plasmid', NA, x)))
Plot
p <- ggplot(subset(sankey.dat, Species!='plasmid'),aes(y=log10(n), axis1=Species, axis2=Antibiotics_cluster)) +
geom_alluvium(aes(fill=Antibiotics_cluster), width=1/5) +
geom_stratum(width=1/5, fill=NA) +
geom_text(stat = "stratum", label.strata = TRUE, size=6, fontface = "italic", nudge_x=-0.12, hjust=1) +
scale_fill_manual(values=g_cols[match(levels(sankey.dat$Antibiotics_cluster) , LETTERS[1:14])]) +
scale_x_discrete(limits = c("Species", "Antibiotics Cluster"), expand = c(.5, .05)) +
theme_void() +
theme()
p
ggsave('../plots/fig3b.ar_gene_linkage_sankey.pdf', p, width=25, height=25)
Staphylococcus aureus network
g <- get_graph_data('Staphylococcus_aureus')
p <- ggraph(g, layout = 'fr')+#, circular=TRUE) +
geom_edge_arc(aes(width=width, color=community),
curvature = 0.0,
alpha=0.7,
end_cap=circle(1.5, 'mm'), start_cap=circle(1.5, 'mm')) +
scale_edge_color_manual(values=g_cols,
na.value = rgb(0.8,0.8,0.8,0.5)) +
geom_node_point(aes(fill=Type, color=carb),size=5, shape=21) +
scale_color_manual(values=c('grey','firebrick')) +
geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') +
scale_edge_width(range = c(0.5,3)) +
scale_fill_manual(values=pal_ucscgb('default')(16)[-c(1,3,4)]) +
theme_void()
p
ggsave('../plots/sup11.staph_aureus_ar_gene_linkage_graph.pdf', p, width=5, height=5)
Genome network
g <- get_graph_data('plasmid', reverse = TRUE)
p <- ggraph(g, layout = 'fr')+#, circular=TRUE) +
geom_edge_arc(aes(width=width, color=community),
curvature = 0.0,
alpha=0.7,
end_cap=circle(1.5, 'mm'), start_cap=circle(1.5, 'mm')) +
scale_edge_color_manual(values=g_cols,
na.value = rgb(0.8,0.8,0.8,0.5)) +
geom_node_point(aes(fill=Type, color=carb),size= 5, shape=21) +
scale_color_manual(values=c('grey','firebrick')) +
geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') +
scale_edge_width(range = c(0.5,3)) +
scale_fill_manual(values=pal_ucscgb('default')(16)[-c(1,3,4)]) +
theme_void()
p
ggsave('../plots/sup12.genome_ar_gene_linkage_graph.pdf', p, width=16, height=16)
Plasmid network
g <- get_graph_data('plasmid')
p <- ggraph(g, layout = 'fr')+#, circular=TRUE) +
geom_edge_arc(aes(width=width, color=community),
curvature = 0.0,
alpha=0.7,
end_cap=circle(1.5, 'mm'), start_cap=circle(1.5, 'mm')) +
scale_edge_color_manual(values=g_cols,
na.value = rgb(0.8,0.8,0.8,0.5)) +
geom_node_point(aes(fill=Type, color=carb),size= as.numeric(as.factor(V(g)$carb) )*5, shape=21) +
scale_color_manual(values=c('grey','firebrick')) +
geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') +
scale_edge_width(range = c(0.5,3)) +
scale_fill_manual(values=pal_ucscgb('default')(16)[-c(1,3,4)]) +
theme_void()
p
ggsave('../plots/sup12.plasmid_ar_gene_linkage_graph.pdf', p, width=15, height=10)
sessionInfo()
## R version 3.4.4 (2018-03-15)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: CentOS release 6.9 (Final)
##
## Matrix products: default
## BLAS: /usr/lib64/R/lib/libRblas.so
## LAPACK: /usr/lib64/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ggnewscale_0.1.9000 ggalluvial_0.9.1 ggraph_1.0.2
## [4] igraph_1.2.2 readr_1.1.1 foreach_1.4.4
## [7] ggsci_2.9 reshape2_1.4.3 stringr_1.3.0
## [10] tibble_2.0.1 tidyr_0.8.2 dplyr_0.8.0.1
## [13] gridExtra_2.3 ggplot2_3.1.0
##
## loaded via a namespace (and not attached):
## [1] tidyselect_0.2.5 purrr_0.3.0 colorspace_1.3-2
## [4] htmltools_0.3.6 viridisLite_0.3.0 yaml_2.1.18
## [7] utf8_1.1.4 rlang_0.3.1 pillar_1.3.1
## [10] glue_1.3.0 withr_2.1.2 tweenr_1.0.0
## [13] plyr_1.8.4 munsell_0.5.0 gtable_0.2.0
## [16] codetools_0.2-15 evaluate_0.10.1 labeling_0.3
## [19] knitr_1.20 fansi_0.4.0 Rcpp_1.0.0
## [22] scales_1.0.0 backports_1.1.2 farver_1.0
## [25] ggforce_0.1.3 hms_0.4.2 digest_0.6.18
## [28] stringi_1.3.1 ggrepel_0.8.0 rprojroot_1.3-2
## [31] cli_1.0.1 tools_3.4.4 magrittr_1.5
## [34] lazyeval_0.2.1 crayon_1.3.4 pkgconfig_2.0.2
## [37] MASS_7.3-49 assertthat_0.2.0 rmarkdown_1.9
## [40] iterators_1.0.9 viridis_0.5.1 R6_2.4.0
## [43] units_0.6-1 compiler_3.4.4